library(ggplot2)
library(readxl)
library(readr)
library(tidyverse)
library(dplyr)
library(sf)
library(plotly)
library(geojsonio)
library(knitr)
library(tidyr)
# Load the dataset
df_descriptive=read_csv("data_final.csv")
data_final <- read_csv("data_final.csv")
# Grouping the data by year to count the number of incidents
df_descriptive$Year <- as.factor(df_descriptive$Year)
year_counts <- as.data.frame(table(df_descriptive$Year))
colnames(year_counts) <- c('Year', 'Count')
# Plotting the line chart for shooting incidents by year with data points labeled
ggplot(year_counts, aes(x = Year, y = Count, group = 1)) +
geom_line(color = 'blue') +
geom_point(size = 3) +
geom_text(aes(label = Count), vjust = -0.5, size = 3) +
labs(title = 'Shooting Incidents by Year in NYC',
x = 'Year',
y = 'Number of Shooting Incidents') +
theme_minimal()

The chart shows a stable trend in shooting incidents from 2017 to 2019, followed by a sharp increase in 2020, likely linked to socio-economic factors like the COVID-19 pandemic. Incidents peaked in 2021 and then declined through 2023.
# Grouping the data by month
df_descriptive$Month <- as.factor(df_descriptive$Month)
month_counts <- as.data.frame(table(df_descriptive$Month))
colnames(month_counts) <- c('Month', 'Count')
# Plotting the bar chart for shooting incidents by month
ggplot(month_counts, aes(x = Month, y = Count)) +
geom_bar(stat = 'identity', fill = 'skyblue') +
geom_text(aes(label = Count), vjust = -0.5) +
labs(title = 'Bar Chart of Shooting Incidents by Month in NYC',
x = 'Month',
y = 'Number of Shooting Incidents') +
theme_minimal()

The bar chart shows a seasonal trend in shooting incidents in NYC, with a significant increase during the warmer months, peaking in July (1248 incidents). The highest numbers are seen in June, July, and August, while the lowest are in February (494 incidents) and January (617 incidents), suggesting fewer incidents during colder months. The gradual rise from January to July, followed by a decline towards December, indicates a correlation between warmer weather and increased incidents, potentially due to increased outdoor activities.
# Convert OCCUR_TIME to a proper time format
df_descriptive$OCCUR_TIME <- format(strptime(df_descriptive$OCCUR_TIME, format = "%H:%M:%S"), "%H:%M:%S")
# Plotting the density of the occurrence time
ggplot(df_descriptive, aes(x = as.POSIXct(OCCUR_TIME, format = "%H:%M:%S"))) +
geom_density(fill = 'skyblue', alpha = 0.6) +
labs(title = 'Density Plot of Shooting Incident Occurrence Time in NYC',
x = 'Time of Occurrence',
y = 'Density') +
scale_x_datetime(date_labels = "%H:%M") +
theme_minimal()

The density plot shows changes in shooting incident trends throughout the day in NYC, with two distinct peaks. The first peak occurs in the late night to early morning hours, and the second peak appears in the evening. The lowest density is observed in the late morning to early afternoon.
sky_dark_counts <- df_descriptive %>%
group_by(Sky_Is_Dark) %>%
summarise(total_incidents = n())
ggplot(sky_dark_counts, aes(x = Sky_Is_Dark, y = total_incidents, fill = Sky_Is_Dark)) +
geom_bar(stat = 'identity') +
scale_x_discrete(labels = c("FALSE" = "Daytime", "TRUE" = "Nighttime")) + # Update x-axis labels
scale_fill_manual(values = c("FALSE" = "skyblue", "TRUE" = "darkblue"),
labels = c("FALSE" = "Daytime", "TRUE" = "Nighttime")) +
labs(title = 'Number of Shooting Incidents by time (Daytime vs. Nigttime)',
x = 'Time of a day(Daytime vs. Nigttime)',
y = 'Number of Shooting Incidents') +
theme_minimal()

The number of shooting incidents when the sky was dark is significantly higher than when it was bright. This might indicate that shootings are more likely to occur during nighttime or low visibility conditions. The higher number of incidents during dark conditions could be due to factors such as reduced visibility, higher activity at night, or fewer people around, making it easier for incidents to occur undetected.
# Load dataset
cdta_shape = st_read("nycdta2020_24d/nycdta2020.shp")
## Reading layer `nycdta2020' from data source
## `D:\Rproject_ds1\ZebangZHANGstat.github.io\nycdta2020_24d\nycdta2020.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 71 features and 8 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 913175.1 ymin: 120128.4 xmax: 1067383 ymax: 272844.3
## Projected CRS: NAD83 / New York Long Island (ftUS)
boro_shape = st_read("Borough Boundaries/geo_export_391a75ed-0ae4-4c88-8c30-3588c75bd01e.shp")
## Reading layer `geo_export_391a75ed-0ae4-4c88-8c30-3588c75bd01e' from data source `D:\Rproject_ds1\ZebangZHANGstat.github.io\Borough Boundaries\geo_export_391a75ed-0ae4-4c88-8c30-3588c75bd01e.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 5 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -74.25559 ymin: 40.49613 xmax: -73.70001 ymax: 40.91553
## Geodetic CRS: WGS 84
# Count the boro incident
boro_incident_counts <- data_final %>%
group_by(BORO) %>%
summarise(Number_of_Incidents = n(), .groups = "drop") %>%
mutate(BORO = tolower(BORO) )
# Lowercase the boro in boro_shape
boro_shape = boro_shape %>%
mutate(boro_name = tolower(boro_name))
# Merge spatial data with incident counts
boro_map_data <- boro_shape %>%
left_join(boro_incident_counts, by = c("boro_name" = "BORO"))
data_final <- read_csv("data_final.csv")
boro_map_data <- boro_map_data %>%
mutate(
hover_text = paste("Borough:", boro_name, "<br>Total Incidents:", Number_of_Incidents)
)
# Create the interactive plot with click functionality
plot <- plot_ly(
data = boro_map_data,
type = "scattermapbox",
split = ~boro_name, # Separate polygons by boroughs
color = ~Number_of_Incidents, # Color based on the number of incidents
colors = "viridis", # Use a color scale
text = ~hover_text, # Display hover text
hoverinfo = "text",
marker = list(size = 8, opacity = 0.7)
) %>%
layout(
title = "Total Number of Incidents Across NYC BOROs (2017-2023)",
mapbox = list(
style = "carto-positron", # Base map style
center = list(lon = -74.00, lat = 40.7128), # Center map on NYC
zoom = 9
)
)
# Add click functionality to display the borough name and number of incidents
plot <- plot %>%
event_register("plotly_click") %>%
htmlwidgets::onRender("
function(el, x) {
el.on('plotly_click', function(d) {
var point = d.points[0];
var text = point.text;
alert('You clicked on: ' + text);
});
}
")
# Display the interactive plot
plot
The map shows the total number of incidents across NYC boroughs (2017–2023), with each borough represented by a distinct color. A gradient is used to indicate the magnitude of incidents, with brighter shades corresponding to higher counts, ranging from 0 to over 3,000 incidents. Each borough is outlined and filled with its respective color, making it easy to distinguish. The legend on the right identifies the boroughs (Bronx, Brooklyn, Manhattan, Queens, Staten Island) and aligns with the color gradient to show the number of incidents. This visualization highlights geographical disparities in incident frequency across the boroughs, aiding in understanding spatial distribution patterns.
## There is space between letter and number in CDTA, delete the space below
data_final$CDTA <- gsub(" ", "", data_final$CDTA)
cdta_incident_counts <- data_final %>%
group_by(CDTA) %>%
summarise(Number_of_Incidents = n(), .groups = "drop")
# Remove any trailing spaces or mismatches in CDTA identifiers:
cdta_shape$CDTA2020 <- gsub(" ", "", cdta_shape$CDTA2020)
data_final$CDTA <- gsub(" ", "", data_final$CDTA)
# Identify Missing Matches
unmatched_cdta <- setdiff(cdta_shape$CDTA2020, data_final$CDTA)
#Re-Merge the Data
cdta_map_data <- cdta_shape %>%
left_join(cdta_incident_counts, by = c("CDTA2020" = "CDTA"))
# Update NA Handling
cdta_map_data <- cdta_map_data %>%
mutate(
Number_of_Incidents = ifelse(is.na(Number_of_Incidents), 0, Number_of_Incidents),
Incident_Range = cut(
Number_of_Incidents,
breaks = seq(0, 600, by = 120),
labels = c("0-120", "121-240", "241-360", "361-480", "481-600"),
include.lowest = TRUE
)
)
“QN80” “SI95” “QN84” are NAs. Therefore, these 3 CDTAs are assigned with 0 incident.
ggplot(data = cdta_map_data) +
geom_sf(aes(fill = Incident_Range), color = "white", size = 0.2) +
geom_sf_text(aes(label = Number_of_Incidents), size = 3, color = "black") + # Add labels+
scale_fill_manual(
values = c(
"0-120" = "#b2e2e2",
"121-240" = "skyblue",
"241-360" = "#66c2a4",
"361-480" = "#2ca25f",
"481-600" = "#006d2c"
),
name = "Number of Incidents"
) +
labs(
title = "Total Number of Incidents Across NYC CDTAs from 2017 to 2023",
subtitle = "Incidents grouped by range (0-600, 120 breaks)"
) +
theme_minimal() +
theme(
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank()
)

The map shows NYC CDTA incidents (2017–2023) using a gradient from light blue (fewer incidents) to dark green (more incidents), with counts labeled on each district. Dark green areas highlight hotspots, likely in densely populated regions, while lighter blue areas, like Staten Island, show fewer incidents. This visualization helps identify trends and prioritize safety efforts.
boroughs <- unique(cdta_map_data$BoroName)
for (b in boroughs) {
borough_data <- cdta_map_data %>%
filter(BoroName == b)
plot <- ggplot(data = borough_data) +
geom_sf(aes(fill = Number_of_Incidents), color = "black") +
geom_sf_text(aes(label = Number_of_Incidents), size = 3, color = "black") + # Add labels
scale_fill_gradientn(
colors = c("skyblue","green", "yellow", "red"), # Custom color scale
name = "Number of Incidents"
) +
labs(
title = paste("CDTA Incidents in", b),
subtitle = "2017 to 2023",
x = "Longitude",
y = "Latitude"
) +
theme_minimal()
print(plot)
}





The Bronx demonstrates significant variability in the number of incidents across its CDTAs, with central and southern regions experiencing the highest numbers of incidents, some exceeding 300. This highlights the borough as a hotspot for crime relative to others. Socioeconomic challenges, high population density, or structural inequities could contribute to these trends. Interventions should prioritize these high-incident areas by allocating additional law enforcement resources and community support programs to mitigate the causes of elevated crime levels.
Manhattan shows a relatively low overall number of incidents, except for a few northern neighborhoods, where one CDTA is a clear outlier with incidents surpassing 1200. This disparity suggests a concentration of crime in specific areas rather than borough-wide. The localized nature of the issue could be due to unique socioeconomic pressures or demographic factors in those neighborhoods. Targeted initiatives such as community policing or economic investment in northern Manhattan could help address these localized hotspots.
Queens generally exhibits a safer profile with most CDTAs reporting fewer than 100 incidents. However, one central area stands out with more than 400 incidents, marking it as a concern. The low number of incidents elsewhere reflects the suburban and less dense nature of the borough. Efforts in Queens should focus on analyzing the central hotspot to determine the underlying causes, such as potential economic or social stressors, and implementing programs to improve community safety.
Brooklyn presents a widespread distribution of incidents, with several CDTAs exceeding 500 incidents. The borough shows higher overall numbers compared to Queens and Manhattan, which may be linked to its high population density and mixed socioeconomic profile. The more even distribution of incidents suggests a need for borough-wide interventions rather than focusing on isolated areas. A combination of public safety initiatives, housing improvements, and community programs would be essential for addressing crime across the borough.
Staten Island stands out with the lowest number of incidents among the five boroughs. Most CDTAs report fewer than 50 incidents, with one area as an exception, exceeding 200 incidents. This aligns with Staten Island’s suburban and less densely populated character, which likely contributes to its lower crime rates. Maintaining this safety level requires continued focus on community engagement and ensuring sufficient resources are available to address any emerging hotspots.
Exploratory Data Analysis by Neighborhood Tabulation Area
# Table showing Top 10 NTAs with the Highest Total Shooting Incidents
top_10_nta_overall <- df_descriptive %>%
group_by(BORO, NTA) %>%
summarise(total_incidents = n(),
total_population = first(Total_population_nta),
percent_poverty = first(Percent_poverty),
percent_education = first(Percent_education)) %>%
arrange(desc(total_incidents)) %>%
ungroup() %>%
slice(1:10) %>%
select(NTA, BORO, total_incidents, total_population, percent_poverty, percent_education)
# Display the table in the desired format
kable(top_10_nta_overall, digits = 2, caption = "Top 10 NTAs with the Highest Total Shooting Incidents (2017-2023)")
| NTA | BORO | total_incidents | total_population | percent_poverty | percent_education |
|---|---|---|---|---|---|
| Brownsville | BROOKLYN | 350 | 60470 | 37.2 | 76.9 |
| Bedford-Stuyvesant (East) | BROOKLYN | 323 | 84653 | 22.2 | 87.8 |
| Harlem (North) | MANHATTAN | 253 | 83327 | 25.1 | 82.9 |
| Crown Heights (North) | BROOKLYN | 235 | 85275 | 20.5 | 84.1 |
| East Harlem (North) | MANHATTAN | 229 | 64655 | 32.3 | 75.9 |
| Mott Haven-Port Morris | BRONX | 207 | 57718 | 43.7 | 62.0 |
| Bedford-Stuyvesant (West) | BROOKLYN | 207 | 89189 | 26.0 | 84.8 |
| East New York-New Lots | BROOKLYN | 205 | 53004 | 25.3 | 82.6 |
| Concourse-Concourse Village | BRONX | 169 | 69387 | 31.9 | 70.5 |
| Mount Eden-Claremont (West) | BRONX | 166 | 49651 | 31.8 | 67.9 |
# Get the Top 10 NTAs with the Highest Incident Rate by Year
top_10_incident_rate <- df_descriptive %>%
distinct(incident_rate_by_year_nta, .keep_all = TRUE) %>%
filter(NTAType == 'Residential') %>%
arrange(desc(incident_rate_by_year_nta)) %>%
slice(1:10)
# Pull information for the Top 10 NTAs
top_10_nta_incident_rate <- top_10_incident_rate %>%
select(NTA, BORO,Year,incident_rate = incident_rate_by_year_nta,
total_population = Total_population_nta,
percent_poverty = Percent_poverty,
percent_education = Percent_education)
# Display the table in the desired format
kable(top_10_nta_incident_rate, digits = 5, caption = "Top 10 NTAs with the Highest Incident Rate in Each Borough")
| NTA | BORO | Year | incident_rate | total_population | percent_poverty | percent_education |
|---|---|---|---|---|---|---|
| Claremont Village-Claremont (East) | BRONX | 2021 | 0.18735 | 24553 | 43.2 | 61.7 |
| Brownsville | BROOKLYN | 2020 | 0.15380 | 60470 | 37.2 | 76.9 |
| Tremont | BRONX | 2020 | 0.11820 | 32150 | 36.3 | 71.4 |
| Inwood | MANHATTAN | 2021 | 0.11571 | 36299 | 15.3 | 77.0 |
| East New York-New Lots | BROOKLYN | 2020 | 0.10754 | 53004 | 25.3 | 82.6 |
| Hunts Point | BRONX | 2021 | 0.10574 | 15131 | 34.3 | 72.2 |
| Mount Eden-Claremont (West) | BRONX | 2021 | 0.10070 | 49651 | 31.8 | 67.9 |
| Brownsville | BROOKLYN | 2022 | 0.09922 | 60470 | 37.2 | 76.9 |
| Brownsville | BROOKLYN | 2021 | 0.09757 | 60470 | 37.2 | 76.9 |
| Tremont | BRONX | 2023 | 0.09642 | 32150 | 36.3 | 71.4 |
# Table showing Top 10 total incident NTA in Each Borough
incident_by_nta_borough <- df_descriptive %>%
drop_na() %>%
group_by(BORO, NTA) %>%
summarise(total_incidents = n()) %>%
arrange(BORO, desc(total_incidents)) %>%
group_by(BORO) %>%
slice_max(n = 10, order_by = total_incidents) %>%
select(-total_incidents) %>%
group_by(BORO) %>%
mutate(row_num = row_number()) %>%
pivot_wider(names_from = BORO, values_from = NTA) %>%
unnest(cols = c(BRONX, BROOKLYN, MANHATTAN, QUEENS, `STATEN ISLAND`)) %>%
select(row_num, everything())%>%
slice(1:10)
# Display the table in the desired format
kable(incident_by_nta_borough, caption = "Top 10 NTAs with Total Shooting Incidents in Each Borough (2017-2023)")
| row_num | BRONX | BROOKLYN | MANHATTAN | QUEENS | STATEN ISLAND |
|---|---|---|---|---|---|
| 1 | Mott Haven-Port Morris | Brownsville | East Harlem (North) | Far Rockaway-Bayswater | St. George-New Brighton |
| 2 | Mount Hope | Bedford-Stuyvesant (East) | Harlem (North) | Baisley Park | Tompkinsville-Stapleton-Clifton-Fox Hills |
| 3 | Tremont | Crown Heights (North) | East Harlem (South) | Richmond Hill | West New Brighton-Silver Lake-Grymes Hill |
| 4 | Mount Eden-Claremont (West) | East New York-New Lots | Harlem (South) | Jamaica | Mariner’s Harbor-Arlington-Graniteville |
| 5 | Concourse-Concourse Village | East Flatbush-Remsen Village | Washington Heights (South) | Rockaway Beach-Arverne-Edgemere | Annadale-Huguenot-Prince’s Bay-Woodrow |
| 6 | Williamsbridge-Olinville | East New York (North) | Chelsea-Hudson Yards | St. Albans | Port Richmond |
| 7 | Melrose | Bedford-Stuyvesant (West) | Hamilton Heights-Sugar Hill | South Ozone Park | Rosebank-Shore Acres-Park Hill |
| 8 | Belmont | Canarsie | Inwood | Elmhurst | Grasmere-Arrochar-South Beach-Dongan Hills |
| 9 | Longwood | Coney Island-Sea Gate | Chinatown-Two Bridges | South Jamaica | Great Kills-Eltingville |
| 10 | Fordham Heights | East New York-City Line | Washington Heights (North) | Astoria (East)-Woodside (North) | New Dorp-Midland Beach |
Table showing Top 10 NTAs with the Highest Total Shooting Incidents (2017-2023) in MANHATTAN
top_10_nta_overall_manhattan <- df_descriptive %>%
filter(BORO == 'MANHATTAN') %>%
group_by(BORO, NTA) %>%
summarise(total_incidents = n(),
total_population = first(Total_population_nta),
percent_poverty = first(Percent_poverty),
percent_education = first(Percent_education)) %>%
arrange(desc(total_incidents)) %>%
ungroup() %>%
slice(1:10) %>%
select(NTA, total_incidents, total_population, percent_poverty, percent_education)
# Display the table in the desired format
kable(top_10_nta_overall_manhattan, digits = 2, caption = "Top 10 NTAs with the Highest Total Shooting Incidents in MANHATTAN (2017-2023)")
| NTA | total_incidents | total_population | percent_poverty | percent_education |
|---|---|---|---|---|
| Harlem (North) | 253 | 83327 | 25.1 | 82.9 |
| East Harlem (North) | 229 | 64655 | 32.3 | 75.9 |
| East Harlem (South) | 145 | 59814 | 29.0 | 78.3 |
| Harlem (South) | 130 | 47113 | 18.8 | 87.4 |
| Inwood | 105 | 36299 | 15.3 | 77.0 |
| Washington Heights (South) | 88 | 72037 | 19.9 | 70.3 |
| Washington Heights (North) | 67 | 71842 | 14.8 | 79.3 |
| Hamilton Heights-Sugar Hill | 54 | 49410 | 24.0 | 78.6 |
| Manhattanville-West Harlem | 46 | 22183 | 26.7 | 77.4 |
| East Village | 41 | 71436 | 24.3 | 87.7 |
Table showing Top 10 NTAs with the Highest Annual Shooting Incident Rate in MANHATTAN
top_10_incident_rate_manhattan <- df_descriptive %>%
distinct(incident_rate_by_year_nta, .keep_all = TRUE) %>%
filter(BORO == 'MANHATTAN') %>%
filter(NTAType == 'Residential') %>%
arrange(desc(incident_rate_by_year_nta)) %>%
slice(1:10)
# Pull information for the Top 10 NTAs
top_10_nta_incident_rate_manhattan <- top_10_incident_rate_manhattan %>%
select(NTA, Year, incident_rate = incident_rate_by_year_nta,
total_population = Total_population_nta,
percent_poverty = Percent_poverty,
percent_education = Percent_education)
# Display the table in the desired format
kable(top_10_nta_incident_rate_manhattan, digits = 6, caption = "Top 10 NTAs with the Highest Annual Incident Rate in MANHATTAN")
| NTA | Year | incident_rate | total_population | percent_poverty | percent_education |
|---|---|---|---|---|---|
| Inwood | 2021 | 0.115706 | 36299 | 15.3 | 77.0 |
| East Harlem (North) | 2022 | 0.086614 | 64655 | 32.3 | 75.9 |
| Harlem (North) | 2021 | 0.080406 | 83327 | 25.1 | 82.9 |
| Inwood | 2020 | 0.068872 | 36299 | 15.3 | 77.0 |
| Harlem (South) | 2021 | 0.065799 | 47113 | 18.8 | 87.4 |
| Harlem (North) | 2022 | 0.064805 | 83327 | 25.1 | 82.9 |
| East Harlem (North) | 2021 | 0.061867 | 64655 | 32.3 | 75.9 |
| Harlem (South) | 2022 | 0.057309 | 47113 | 18.8 | 87.4 |
| Harlem (North) | 2020 | 0.055204 | 83327 | 25.1 | 82.9 |
| East Harlem (South) | 2021 | 0.055171 | 59814 | 29.0 | 78.3 |
Table showing Top 10 NTAs with the Highest Total Shooting Incidents (2017-2023) in BROOKLYN
top_10_nta_overall_brooklyn <- df_descriptive %>%
filter(BORO == 'BROOKLYN') %>%
group_by(BORO, NTA) %>%
summarise(total_incidents = n(),
total_population = first(Total_population_nta),
percent_poverty = first(Percent_poverty),
percent_education = first(Percent_education)) %>%
arrange(desc(total_incidents)) %>%
ungroup() %>%
slice(1:10) %>%
select(NTA, total_incidents, total_population, percent_poverty, percent_education)
# Display the table in the desired format
kable(top_10_nta_overall_brooklyn, digits = 2, caption = "Top 10 NTAs with the Highest Total Shooting Incidents in BROOKLYN (2017-2023)")
| NTA | total_incidents | total_population | percent_poverty | percent_education |
|---|---|---|---|---|
| Brownsville | 350 | 60470 | 37.2 | 76.9 |
| Bedford-Stuyvesant (East) | 323 | 84653 | 22.2 | 87.8 |
| Crown Heights (North) | 235 | 85275 | 20.5 | 84.1 |
| Bedford-Stuyvesant (West) | 207 | 89189 | 26.0 | 84.8 |
| East New York-New Lots | 205 | 53004 | 25.3 | 82.6 |
| Canarsie | 166 | 89932 | 13.0 | 88.1 |
| East New York (North) | 148 | 42818 | 25.5 | 79.4 |
| East Flatbush-Remsen Village | 126 | 39115 | 18.7 | 83.7 |
| Ocean Hill | 120 | 37952 | 27.3 | 83.1 |
| Flatbush | 111 | 66503 | 13.7 | 87.8 |
Table showing Top 10 NTAs with the Highest Annual Shooting Incident Rate in BROOKLYN
top_10_incident_rate_brooklyn <- df_descriptive %>%
distinct(incident_rate_by_year_nta, .keep_all = TRUE) %>%
filter(BORO == 'BROOKLYN') %>%
filter(NTAType == 'Residential') %>%
arrange(desc(incident_rate_by_year_nta)) %>%
slice(1:10)
# Pull information for the Top 10 NTAs
top_10_nta_incident_rate_brooklyn <- top_10_incident_rate_brooklyn %>%
select(NTA, Year, incident_rate = incident_rate_by_year_nta,
total_population = Total_population_nta,
percent_poverty = Percent_poverty,
percent_education = Percent_education)
# Display the table in the desired format
kable(top_10_nta_incident_rate_brooklyn, digits = 6, caption = "Top 10 NTAs with the Annual Highest Incident Rate in BROOKLYN")
| NTA | Year | incident_rate | total_population | percent_poverty | percent_education |
|---|---|---|---|---|---|
| Brownsville | 2020 | 0.153795 | 60470 | 37.2 | 76.9 |
| East New York-New Lots | 2020 | 0.107539 | 53004 | 25.3 | 82.6 |
| Brownsville | 2022 | 0.099223 | 60470 | 37.2 | 76.9 |
| Brownsville | 2021 | 0.097569 | 60470 | 37.2 | 76.9 |
| Bedford-Stuyvesant (East) | 2020 | 0.088597 | 84653 | 22.2 | 87.8 |
| East New York (North) | 2020 | 0.079406 | 42818 | 25.5 | 79.4 |
| East New York-New Lots | 2021 | 0.079239 | 53004 | 25.3 | 82.6 |
| East Flatbush-Remsen Village | 2020 | 0.071584 | 39115 | 18.7 | 83.7 |
| Ocean Hill | 2021 | 0.071142 | 37952 | 27.3 | 83.1 |
| Brownsville | 2018 | 0.069456 | 60470 | 37.2 | 76.9 |
Table showing Top 10 NTAs with the Highest Total Shooting Incidents (2017-2023) in BRONX
top_10_nta_overall_bronx <- df_descriptive %>%
filter(BORO == 'BRONX') %>%
group_by(BORO, NTA) %>%
summarise(total_incidents = n(),
total_population = first(Total_population_nta),
percent_poverty = first(Percent_poverty),
percent_education = first(Percent_education)) %>%
arrange(desc(total_incidents)) %>%
ungroup() %>%
slice(1:10) %>%
select(NTA, total_incidents, total_population, percent_poverty, percent_education)
# Display the table in the desired format
kable(top_10_nta_overall_bronx, digits = 2, caption = "Top 10 NTAs with the Highest Total Shooting Incidents in BRONX (2017-2023)")
| NTA | total_incidents | total_population | percent_poverty | percent_education |
|---|---|---|---|---|
| Mott Haven-Port Morris | 207 | 57718 | 43.7 | 62.0 |
| Concourse-Concourse Village | 169 | 69387 | 31.9 | 70.5 |
| Mount Eden-Claremont (West) | 166 | 49651 | 31.8 | 67.9 |
| Williamsbridge-Olinville | 158 | 61346 | 21.0 | 76.6 |
| Melrose | 152 | 42651 | 40.7 | 68.2 |
| Mount Hope | 149 | 49099 | 30.2 | 66.8 |
| Tremont | 138 | 32150 | 36.3 | 71.4 |
| Longwood | 130 | 40289 | 32.0 | 66.9 |
| Belmont | 116 | 35825 | 39.6 | 65.6 |
| Fordham Heights | 112 | 32099 | 33.2 | 66.0 |
Table showing Top 10 NTAs with the Highest Annual Shooting Incident Rate in BRONX
top_10_incident_rate_bronx <- df_descriptive %>%
distinct(incident_rate_by_year_nta, .keep_all = TRUE) %>%
filter(BORO == 'BRONX') %>%
filter(NTAType == 'Residential') %>%
arrange(desc(incident_rate_by_year_nta)) %>%
slice(1:10)
# Pull information for the Top 10 NTAs
top_10_nta_incident_rate_bronx <- top_10_incident_rate_bronx %>%
select(NTA, Year, incident_rate = incident_rate_by_year_nta,
total_population = Total_population_nta,
percent_poverty = Percent_poverty,
percent_education = Percent_education)
# Display the table in the desired format
kable(top_10_nta_incident_rate_bronx, digits = 6, caption = "Top 10 NTAs with the Highest Annual Incident Rate in BRONX")
| NTA | Year | incident_rate | total_population | percent_poverty | percent_education |
|---|---|---|---|---|---|
| Claremont Village-Claremont (East) | 2021 | 0.187350 | 24553 | 43.2 | 61.7 |
| Tremont | 2020 | 0.118196 | 32150 | 36.3 | 71.4 |
| Hunts Point | 2021 | 0.105743 | 15131 | 34.3 | 72.2 |
| Mount Eden-Claremont (West) | 2021 | 0.100703 | 49651 | 31.8 | 67.9 |
| Tremont | 2023 | 0.096423 | 32150 | 36.3 | 71.4 |
| West Farms | 2021 | 0.094307 | 20147 | 44.8 | 61.1 |
| Longwood | 2021 | 0.091836 | 40289 | 32.0 | 66.9 |
| Mount Hope | 2023 | 0.089615 | 49099 | 30.2 | 66.8 |
| West Farms | 2022 | 0.084380 | 20147 | 44.8 | 61.1 |
| Morrisania | 2020 | 0.082431 | 37607 | 35.7 | 65.9 |
Table showing Top 10 NTAs with the Highest Total Shooting Incidents (2017-2023) in QUEENS
top_10_nta_overall_queens <- df_descriptive %>%
filter(BORO == 'QUEENS') %>%
group_by(BORO, NTA) %>%
summarise(total_incidents = n(),
total_population = first(Total_population_nta),
percent_poverty = first(Percent_poverty),
percent_education = first(Percent_education)) %>%
arrange(desc(total_incidents)) %>%
ungroup() %>%
slice(1:10) %>%
select(NTA, total_incidents, total_population, percent_poverty, percent_education)
# Display the table in the desired format
kable(top_10_nta_overall_queens, digits = 2, caption = "Top 10 NTAs with the Highest Total Shooting Incidents in QUEENS (2017-2023)")
| NTA | total_incidents | total_population | percent_poverty | percent_education |
|---|---|---|---|---|
| Far Rockaway-Bayswater | 116 | 58648 | 18.3 | 79.9 |
| Rockaway Beach-Arverne-Edgemere | 100 | 41367 | 22.1 | 82.1 |
| St. Albans | 93 | 51816 | 8.6 | 88.8 |
| South Jamaica | 91 | 44401 | 13.4 | 78.9 |
| Baisley Park | 83 | 43090 | 11.4 | 83.5 |
| Jamaica | 82 | 60993 | 15.7 | 70.4 |
| Queensbridge-Ravenswood-Dutch Kills | 79 | 32954 | 16.5 | 85.0 |
| South Ozone Park | 66 | 79540 | 9.7 | 76.7 |
| Richmond Hill | 55 | 34100 | 10.6 | 79.8 |
| Laurelton | 45 | 26088 | 5.4 | 91.2 |
Table showing Top 10 NTAs with the Highest Annual Shooting Incident Rate in QUEENS
top_10_incident_rate_queens <- df_descriptive %>%
distinct(incident_rate_by_year_nta, .keep_all = TRUE) %>%
filter(BORO == 'QUEENS') %>%
filter(NTAType == 'Residential') %>%
arrange(desc(incident_rate_by_year_nta)) %>%
slice(1:10)
# Pull information for the Top 10 NTAs
top_10_nta_incident_rate_queens <- top_10_incident_rate_queens %>%
select(NTA, Year, incident_rate = incident_rate_by_year_nta,
total_population = Total_population_nta,
percent_poverty = Percent_poverty,
percent_education = Percent_education)
# Display the table in the desired format
kable(top_10_nta_incident_rate_queens, digits = 6, caption = "Top 10 NTAs with the Highest Annual Incident Rate in QUEENS")
| NTA | Year | incident_rate | total_population | percent_poverty | percent_education |
|---|---|---|---|---|---|
| South Jamaica | 2020 | 0.069818 | 44401 | 13.4 | 78.9 |
| Rockaway Beach-Arverne-Edgemere | 2020 | 0.067687 | 41367 | 22.1 | 82.1 |
| Far Rockaway-Bayswater | 2020 | 0.064793 | 58648 | 18.3 | 79.9 |
| Queensbridge-Ravenswood-Dutch Kills | 2020 | 0.054622 | 32954 | 16.5 | 85.0 |
| Laurelton | 2020 | 0.053665 | 26088 | 5.4 | 91.2 |
| Rockaway Beach-Arverne-Edgemere | 2021 | 0.050765 | 41367 | 22.1 | 82.1 |
| Richmond Hill | 2023 | 0.046921 | 34100 | 10.6 | 79.8 |
| Queensbridge-Ravenswood-Dutch Kills | 2019 | 0.045518 | 32954 | 16.5 | 85.0 |
| St. Albans | 2021 | 0.044388 | 51816 | 8.6 | 88.8 |
| Richmond Hill | 2022 | 0.043988 | 34100 | 10.6 | 79.8 |
Table showing Top 10 NTAs with the Highest Total Shooting Incidents (2017-2023) in STATEN ISLAND
top_10_nta_overall_staten <- df_descriptive %>%
filter(BORO == 'STATEN ISLAND') %>%
group_by(BORO, NTA) %>%
summarise(total_incidents = n(),
total_population = first(Total_population_nta),
percent_poverty = first(Percent_poverty),
percent_education = first(Percent_education)) %>%
arrange(desc(total_incidents)) %>%
ungroup() %>%
slice(1:10) %>%
select(NTA, total_incidents, total_population, percent_poverty, percent_education)
# Display the table in the desired format
kable(top_10_nta_overall_staten, digits = 2, caption = "Top 10 NTAs with the Highest Total Shooting Incidents in STATEN ISLAND (2017-2023)")
| NTA | total_incidents | total_population | percent_poverty | percent_education |
|---|---|---|---|---|
| St. George-New Brighton | 48 | 20549 | 18.5 | 79.9 |
| Mariner’s Harbor-Arlington-Graniteville | 46 | 33492 | 16.8 | 83.3 |
| Tompkinsville-Stapleton-Clifton-Fox Hills | 42 | 19027 | 25.5 | 79.2 |
| Rosebank-Shore Acres-Park Hill | 34 | 25510 | 14.4 | 81.7 |
| West New Brighton-Silver Lake-Grymes Hill | 26 | 37010 | 9.9 | 91.2 |
| Port Richmond | 23 | 22609 | 22.0 | 83.2 |
| Grasmere-Arrochar-South Beach-Dongan Hills | 8 | 36259 | 11.3 | 85.4 |
| Annadale-Huguenot-Prince’s Bay-Woodrow | 6 | 40534 | 6.0 | 92.6 |
| New Dorp-Midland Beach | 5 | 29083 | 8.5 | 84.6 |
| Arden Heights-Rossville | 4 | 30683 | 4.3 | 92.3 |
Table showing Top 10 NTAs with the Highest Annual Shooting Incident Rate in STATEN ISLAND
top_10_incident_rate_staten <- df_descriptive %>%
distinct(incident_rate_by_year_nta, .keep_all = TRUE) %>%
filter(BORO == 'STATEN ISLAND') %>%
filter(NTAType == 'Residential') %>%
arrange(desc(incident_rate_by_year_nta)) %>%
slice(1:10)
# Pull information for the Top 10 NTAs
top_10_nta_incident_rate_staten <- top_10_incident_rate_staten %>%
select(NTA, Year, incident_rate = incident_rate_by_year_nta,
total_population = Total_population_nta,
percent_poverty = Percent_poverty,
percent_education = Percent_education)
# Display the table in the desired format
kable(top_10_nta_incident_rate_staten, digits = 6, caption = "Top 10 NTAs with the Highest Annual Incident Rate in STATEN ISLAND")
| NTA | Year | incident_rate | total_population | percent_poverty | percent_education |
|---|---|---|---|---|---|
| St. George-New Brighton | 2023 | 0.058397 | 20549 | 18.5 | 79.9 |
| Tompkinsville-Stapleton-Clifton-Fox Hills | 2021 | 0.052557 | 19027 | 25.5 | 79.2 |
| Mariner’s Harbor-Arlington-Graniteville | 2020 | 0.047773 | 33492 | 16.8 | 83.3 |
| Port Richmond | 2020 | 0.044230 | 22609 | 22.0 | 83.2 |
| St. George-New Brighton | 2021 | 0.043798 | 20549 | 18.5 | 79.9 |
| St. George-New Brighton | 2017 | 0.038931 | 20549 | 18.5 | 79.9 |
| Mariner’s Harbor-Arlington-Graniteville | 2017 | 0.038815 | 33492 | 16.8 | 83.3 |
| Tompkinsville-Stapleton-Clifton-Fox Hills | 2017 | 0.036790 | 19027 | 25.5 | 79.2 |
| Rosebank-Shore Acres-Park Hill | 2020 | 0.035280 | 25510 | 14.4 | 81.7 |
| St. George-New Brighton | 2022 | 0.034065 | 20549 | 18.5 | 79.9 |